Each subject saw 192 trials, being 3 cycles of 64 trials. There were 40 subjects. There were 7680 rows in the full data.
First an instruction was presented until subjects dismissed it with a key press. Then a grey screen was presented for 1 second. Then a prime was presented for 2 seconds. Then a grey screen was presented for 1 second. Then the squares with dots were presented until the subject responded, with a 10 second timeout if they did not respond.
Here is a description of each column in the raw data.
Here is a table of instructions.
| vagueness | selection | instruction |
|---|---|---|
| crisp | match | Choose a square with the same number of dots as the target |
| crisp | compare | Choose a square with fewer dots than the target |
| vague | match | Choose a square with about the same number of dots as the target |
| vague | compare | Choose a square with far fewer dots than the target |
| Item | dots |
|---|---|
| Item 1 | (6,15,24) |
| Item 2 | (16,25,34) |
| Item 3 | (26,35,44) |
| Item 4 | (36,45,54) |
Source additional R files with functions for computing confidence intervals.
source("CIfunctions.R")
Get data from the raw per-subject files if necessary.
if (file.exists('data.txt')) {
dat <- read.delim(file="data.txt", header=T, stringsAsFactors=FALSE)
} else {
system(wait=TRUE, paste("head -n 1 ", "../experimentCode","/output/", "subject01.data ", "> ", "data.txt", sep=""))
system(wait=TRUE, paste("tail -q -n +2 ", "../experimentCode", "/output/", "*.data ", ">> ", "data.txt", sep=""))
}
# declare local variables
Selection_of_valid_subjects <- 40
Selection_of_rows <- 7680
Selection_of_trials_per_subject <- Selection_of_rows / Selection_of_valid_subjects # 192
# sort out borderline responses into expected near and far
# How many squares in the square they chose?
dat$RESPONSE <- as.character(dat$key)
for (row in 1 : nrow(dat) ) {
switch(dat[row,'RESPONSE'],
'L' = {dat[row, 'choice'] <- dat[row, 'Lft']},
'M' = {dat[row, 'choice'] <- dat[row, 'Mid']},
'R' = {dat[row, 'choice'] <- dat[row, 'Rgt']}
)
}
dat$Item <- dat$Itm
dat$Itm <- NULL
dat$crossed=paste('Con',dat$Cnd,':Quan',dat$Qty,':Item',dat$Item,sep="")
dat[dat$crossed=='Con1:Quan1:Item1', 'ResponseExpected'] <- 6
dat[dat$crossed=='Con1:Quan1:Item1', 'ResponseNear'] <- 15
dat[dat$crossed=='Con1:Quan1:Item1', 'ResponseFar'] <- 24
dat[dat$crossed=='Con1:Quan1:Item2', 'ResponseExpected'] <- 16
dat[dat$crossed=='Con1:Quan1:Item2', 'ResponseNear'] <- 25
dat[dat$crossed=='Con1:Quan1:Item2', 'ResponseFar'] <- 34
dat[dat$crossed=='Con1:Quan1:Item3', 'ResponseExpected'] <- 26
dat[dat$crossed=='Con1:Quan1:Item3', 'ResponseNear'] <- 35
dat[dat$crossed=='Con1:Quan1:Item3', 'ResponseFar'] <- 44
dat[dat$crossed=='Con1:Quan1:Item4', 'ResponseExpected'] <- 36
dat[dat$crossed=='Con1:Quan1:Item4', 'ResponseNear'] <- 45
dat[dat$crossed=='Con1:Quan1:Item4', 'ResponseFar'] <- 54
dat[dat$crossed=='Con1:Quan2:Item1', 'ResponseExpected'] <- 24
dat[dat$crossed=='Con1:Quan2:Item1', 'ResponseNear'] <- 15
dat[dat$crossed=='Con1:Quan2:Item1', 'ResponseFar'] <- 6
dat[dat$crossed=='Con1:Quan2:Item2', 'ResponseExpected'] <- 34
dat[dat$crossed=='Con1:Quan2:Item2', 'ResponseNear'] <- 25
dat[dat$crossed=='Con1:Quan2:Item2', 'ResponseFar'] <- 16
dat[dat$crossed=='Con1:Quan2:Item3', 'ResponseExpected'] <- 44
dat[dat$crossed=='Con1:Quan2:Item3', 'ResponseNear'] <- 35
dat[dat$crossed=='Con1:Quan2:Item3', 'ResponseFar'] <- 26
dat[dat$crossed=='Con1:Quan2:Item4', 'ResponseExpected'] <- 54
dat[dat$crossed=='Con1:Quan2:Item4', 'ResponseNear'] <- 45
dat[dat$crossed=='Con1:Quan2:Item4', 'ResponseFar'] <- 36
dat[dat$crossed=='Con2:Quan1:Item1', 'ResponseExpected'] <- 6
dat[dat$crossed=='Con2:Quan1:Item1', 'ResponseNear'] <- 15
dat[dat$crossed=='Con2:Quan1:Item1', 'ResponseFar'] <- 24
dat[dat$crossed=='Con2:Quan1:Item2', 'ResponseExpected'] <- 16
dat[dat$crossed=='Con2:Quan1:Item2', 'ResponseNear'] <- 25
dat[dat$crossed=='Con2:Quan1:Item2', 'ResponseFar'] <- 34
dat[dat$crossed=='Con2:Quan1:Item3', 'ResponseExpected'] <- 26
dat[dat$crossed=='Con2:Quan1:Item3', 'ResponseNear'] <- 35
dat[dat$crossed=='Con2:Quan1:Item3', 'ResponseFar'] <- 44
dat[dat$crossed=='Con2:Quan1:Item4', 'ResponseExpected'] <- 36
dat[dat$crossed=='Con2:Quan1:Item4', 'ResponseNear'] <- 45
dat[dat$crossed=='Con2:Quan1:Item4', 'ResponseFar'] <- 54
dat[dat$crossed=='Con2:Quan2:Item1', 'ResponseExpected'] <- 24
dat[dat$crossed=='Con2:Quan2:Item1', 'ResponseNear'] <- 15
dat[dat$crossed=='Con2:Quan2:Item1', 'ResponseFar'] <- 6
dat[dat$crossed=='Con2:Quan2:Item2', 'ResponseExpected'] <- 34
dat[dat$crossed=='Con2:Quan2:Item2', 'ResponseNear'] <- 25
dat[dat$crossed=='Con2:Quan2:Item2', 'ResponseFar'] <- 16
dat[dat$crossed=='Con2:Quan2:Item3', 'ResponseExpected'] <- 44
dat[dat$crossed=='Con2:Quan2:Item3', 'ResponseNear'] <- 35
dat[dat$crossed=='Con2:Quan2:Item3', 'ResponseFar'] <- 26
dat[dat$crossed=='Con2:Quan2:Item4', 'ResponseExpected'] <- 54
dat[dat$crossed=='Con2:Quan2:Item4', 'ResponseNear'] <- 45
dat[dat$crossed=='Con2:Quan2:Item4', 'ResponseFar'] <- 36
dat[dat$crossed=='Con3:Quan1:Item1', 'ResponseExpected'] <- 6
dat[dat$crossed=='Con3:Quan1:Item1', 'ResponseNear'] <- 15
dat[dat$crossed=='Con3:Quan1:Item1', 'ResponseFar'] <- 24
dat[dat$crossed=='Con3:Quan1:Item2', 'ResponseExpected'] <- 16
dat[dat$crossed=='Con3:Quan1:Item2', 'ResponseNear'] <- 25
dat[dat$crossed=='Con3:Quan1:Item2', 'ResponseFar'] <- 34
dat[dat$crossed=='Con3:Quan1:Item3', 'ResponseExpected'] <- 26
dat[dat$crossed=='Con3:Quan1:Item3', 'ResponseNear'] <- 35
dat[dat$crossed=='Con3:Quan1:Item3', 'ResponseFar'] <- 44
dat[dat$crossed=='Con3:Quan1:Item4', 'ResponseExpected'] <- 36
dat[dat$crossed=='Con3:Quan1:Item4', 'ResponseNear'] <- 45
dat[dat$crossed=='Con3:Quan1:Item4', 'ResponseFar'] <- 54
dat[dat$crossed=='Con3:Quan2:Item1', 'ResponseExpected'] <- 24
dat[dat$crossed=='Con3:Quan2:Item1', 'ResponseNear'] <- 15
dat[dat$crossed=='Con3:Quan2:Item1', 'ResponseFar'] <- 6
dat[dat$crossed=='Con3:Quan2:Item2', 'ResponseExpected'] <- 34
dat[dat$crossed=='Con3:Quan2:Item2', 'ResponseNear'] <- 25
dat[dat$crossed=='Con3:Quan2:Item2', 'ResponseFar'] <- 16
dat[dat$crossed=='Con3:Quan2:Item3', 'ResponseExpected'] <- 44
dat[dat$crossed=='Con3:Quan2:Item3', 'ResponseNear'] <- 35
dat[dat$crossed=='Con3:Quan2:Item3', 'ResponseFar'] <- 26
dat[dat$crossed=='Con3:Quan2:Item4', 'ResponseExpected'] <- 54
dat[dat$crossed=='Con3:Quan2:Item4', 'ResponseNear'] <- 45
dat[dat$crossed=='Con3:Quan2:Item4', 'ResponseFar'] <- 36
dat[dat$crossed=='Con4:Quan1:Item1', 'ResponseExpected'] <- 6
dat[dat$crossed=='Con4:Quan1:Item1', 'ResponseNear'] <- 15
dat[dat$crossed=='Con4:Quan1:Item1', 'ResponseFar'] <- 24
dat[dat$crossed=='Con4:Quan1:Item2', 'ResponseExpected'] <- 16
dat[dat$crossed=='Con4:Quan1:Item2', 'ResponseNear'] <- 25
dat[dat$crossed=='Con4:Quan1:Item2', 'ResponseFar'] <- 34
dat[dat$crossed=='Con4:Quan1:Item3', 'ResponseExpected'] <- 26
dat[dat$crossed=='Con4:Quan1:Item3', 'ResponseNear'] <- 35
dat[dat$crossed=='Con4:Quan1:Item3', 'ResponseFar'] <- 44
dat[dat$crossed=='Con4:Quan1:Item4', 'ResponseExpected'] <- 36
dat[dat$crossed=='Con4:Quan1:Item4', 'ResponseNear'] <- 45
dat[dat$crossed=='Con4:Quan1:Item4', 'ResponseFar'] <- 54
dat[dat$crossed=='Con4:Quan2:Item1', 'ResponseExpected'] <- 24
dat[dat$crossed=='Con4:Quan2:Item1', 'ResponseNear'] <- 15
dat[dat$crossed=='Con4:Quan2:Item1', 'ResponseFar'] <- 6
dat[dat$crossed=='Con4:Quan2:Item2', 'ResponseExpected'] <- 34
dat[dat$crossed=='Con4:Quan2:Item2', 'ResponseNear'] <- 25
dat[dat$crossed=='Con4:Quan2:Item2', 'ResponseFar'] <- 16
dat[dat$crossed=='Con4:Quan2:Item3', 'ResponseExpected'] <- 44
dat[dat$crossed=='Con4:Quan2:Item3', 'ResponseNear'] <- 35
dat[dat$crossed=='Con4:Quan2:Item3', 'ResponseFar'] <- 26
dat[dat$crossed=='Con4:Quan2:Item4', 'ResponseExpected'] <- 54
dat[dat$crossed=='Con4:Quan2:Item4', 'ResponseNear'] <- 45
dat[dat$crossed=='Con4:Quan2:Item4', 'ResponseFar'] <- 36
dat$isResponseExpected <- dat$choice == dat$ResponseExpected
dat$isResponseNear <- dat$choice == dat$ResponseNear
dat$isResponseFar <- dat$choice == dat$ResponseFar
for ( row in 1:nrow(dat) ) {
dat[row, 'switchResponse'] <-
ifelse(dat[row, 'isResponseExpected']==TRUE, 'Expected',
ifelse(dat[row, 'isResponseNear']==TRUE, 'Near', 'Far') )
}
# SUBJECT
# ensure Subject is a factor
dat$Subject=factor(paste("s",sprintf("%02d",dat$Sub),sep=""))
# TRIAL
# Trial for subject, 1 to 192
dat$Trial = rep(x = 1:Selection_of_trials_per_subject, times = Selection_of_valid_subjects)
# make a centred Trial for modeling
dat$c_Trl <-dat$Trial - mean(dat$Trial)
# make a scaled Trial for modelling
dat$s_Trl <- as.numeric(scale(dat$Trial))
# ID
# id is a unique identifier for the 7680 row data
dat$id <- factor(paste(paste(dat$Subject),
paste("t", sprintf("%03d", dat$Trial), sep="") , sep=":"))
# ITEM
# create a centred numeric item variable for modeling
dat$c_Itm <- ifelse(dat$Item==1, -.75,
ifelse(dat$Item==2, -.25,
ifelse(dat$Item==3, .25, .75)))
# make Item be a factor and assign labels
dat$Item <- factor(dat$Item, levels=c(1,2,3,4), labels=c("06:15:24", "16:25:34", "26:35:44", "36:45:54"))
# add ratios for Item
item_range_ratio = c(6 / 24, 16 / 34, 26 / 44, 36 / 54)
# 0.2500000 0.4705882 0.5909091 0.6666667
item_range_ratio_scaled = as.vector(scale(c(6 / 24, 16 / 34, 26 / 44, 36 / 54)))
# -1.3441995 -0.1316642 0.5297187 0.9461450
item_mean_ratio = c(mean(c(6 / 15, 15 / 24)), mean(c(16 / 25, 25 / 34)), mean(c(26 /35, 35 / 44)), mean(c(36 / 45, 45 / 54)))
# 0.5125000 0.6876471 0.7691558 0.8166667
item_mean_ratio_scaled = as.vector(scale(c(mean(c(6 / 15, 15 / 24)), mean(c(16 / 25, 25 / 34)), mean(c(26 /35, 35 / 44)), mean(c(36 / 45, 45 / 54)))))
# -1.37582241 -0.06614191 0.54334858 0.89861574
dat[dat$Item == "06:15:24", "item_range_ratio"] <- 0.2500000
dat[dat$Item == "16:25:34", "item_range_ratio"] <- 0.4705882
dat[dat$Item == "26:35:44", "item_range_ratio"] <- 0.5909091
dat[dat$Item == "36:45:54", "item_range_ratio"] <- 0.6666667
dat[dat$Item == "06:15:24", "item_range_ratio_scaled"] <- -1.3441995
dat[dat$Item == "16:25:34", "item_range_ratio_scaled"] <- -0.1316642
dat[dat$Item == "26:35:44", "item_range_ratio_scaled"] <- 0.5297187
dat[dat$Item == "36:45:54", "item_range_ratio_scaled"] <- 0.9461450
dat[dat$Item == "06:15:24", "item_mean_ratio"] <- 0.5125000
dat[dat$Item == "16:25:34", "item_mean_ratio"] <- 0.6876471
dat[dat$Item == "26:35:44", "item_mean_ratio"] <- 0.7691558
dat[dat$Item == "36:45:54", "item_mean_ratio"] <- 0.8166667
dat[dat$Item == "06:15:24", "item_mean_ratio_scaled"] <- -1.37582241
dat[dat$Item == "16:25:34", "item_mean_ratio_scaled"] <- -0.06614191
dat[dat$Item == "26:35:44", "item_mean_ratio_scaled"] <- 0.54334858
dat[dat$Item == "36:45:54", "item_mean_ratio_scaled"] <- 0.89861574
# VAGUENESS
# Create a factor coding for Vagueness
dat[ dat$Cnd==1 , 'Vagueness'] <- 'Vague'
dat[ dat$Cnd==2 , 'Vagueness'] <- 'Crisp'
dat[ dat$Cnd==3 , 'Vagueness'] <- 'Vague'
dat[ dat$Cnd==4 , 'Vagueness'] <- 'Crisp'
dat$Vagueness <- as.factor(dat$Vagueness)
# manually center Vagueness
dat$c_Vag <- ifelse(dat$Vagueness=='Crisp', -.5, .5)
dat$Selection = ""
dat[dat$Cnd %in% c(1,2), "Selection"] <- "matching"
dat[dat$Cnd %in% c(3,4), "Selection"] <- "comparison"
dat$Selection.sum <- ifelse(dat$Selection=="matching", .5, -.5)
dat$Selection <- as.factor(dat$Selection)
# remove redundant confusing columns
dat$Sub <- NULL
dat$Trl <- NULL
# ORDER
# give the levels of Order meaningful names
dat$Order <- factor(dat$Ord, levels=c(1,2), labels=c('LtoR','RtoL'))
# make a manually centred Order
dat$c_Ord <- ifelse(dat$Order=="LtoR",-.5,.5)
# QUANTITY
# give the levels of Quantity meaningful names
dat$Quantity <- factor(dat$Qty, levels=c(1,2), labels=c('Small','Large'))
# make a manually centred Quantity
dat$c_Qty <- ifelse(dat$Quantity=="Small",-.5,.5)
# INSTRUCTION
# add Selection of characters in the instruction # 29 30 34 36 38
dat$nchar_instr = nchar(as.character(dat$Ins))
dat$nchar_instr_scaled = as.vector(scale(nchar(as.character(dat$Ins)), scale=TRUE))
# make Instruction be a factor (17 levels)
dat$Ins <- as.factor(dat$Ins)
# RT
# add transformations of RT
dat$RT <- dat$rt; dat$rt <- NULL
dat$RT_RecSqd <- 1/dat$RT^2
dat$RT_Rec <- 1/dat$RT
dat$RT_RecSqt <- 1/sqrt(dat$RT)
dat$RT_log <- log(dat$RT)
dat$RT_sqt <- sqrt(dat$RT)
dat$RT_raw <- dat$RT
dat$RT_sqd <- dat$RT^2
# lose impossible trials
dd <- dat
dd$RT[dd$RT > 9000 ] <- NA
dd <- dd[complete.cases(dd),]
row.names(dd) <- NULL
# add preceding RT: because we removed impossible trials, the value for preceding RT for a trial following an impossible trial is the value of the trial that preceded the impossible trial.
dd$RTprev <- NA
for (s in levels(dd$Subject)) {
nrows = nrow(dd[dd$Subject==s,])
for (i in 1:nrows) {
if (i==1) {
dd[dd$Subject==s, "RTprev"][i] <- dd[dd$Subject==s, "RT"][i]
}
else
dd[dd$Subject==s, "RTprev"][i] <- dd[dd$Subject==s, "RT"][i-1]
}
}
# add transformations of previous RT
dd$RTprev_RecSqd <- 1/dd$RTprev^2
dd$RTprev_Rec <- 1/dd$RTprev
dd$RTprev_RecSqt0 <- 1/sqrt(dd$RTprev)
dd$RTprev_RecSqt <- as.vector(scale( 1/sqrt(dd$RTprev) ))
dd$RTprev_log <- log(dd$RTprev)
dd$RTprev_sqt <- sqrt(dd$RTprev)
dd$RTprev_raw <- dd$RTprev
dd$RTprev_sqd <- dd$RTprev^2
save(dd, file="data.Rda")
Compare distributions of the various transformations of RT against random samples from normal distributions with the same mean and sd to see which transformations best approximate normal distributions.
## plot the comparison of the transformations against their normal equivalents to see distributional properties using full data
# first create a data frame specially for transformations
# this won't be used for analysis, just for this plot
dat_transforms <- dat
# dfSubset says whether any data points were removed
dat_transforms$dfSubset <- "original"
# RTtype says whether the row contains data for an observed RT or
# a sample from a normal distribution with the same mean and sd
dat_transforms$RTtype <- "observed"
# create extra rows for dfs with removed (outlier) data points
dat1 <- dat_transforms # nrow 7680
dat2 <- subset(dat_transforms, RT<9000) # nrow 7677
dat2$dfSubset <- "RT<9000"
# bring the dfs back into main df
dat_transforms <- rbind(dat1,dat2)
dat_transforms$dfSubset <- factor(dat_transforms$dfSubset, levels=c("original", "RT<9000"))
# create extra rows for normally distributed transformed variables
dato = dat_transforms
datn = dat_transforms; datn$RTtype <- "normal"
dat_transforms <- rbind(dato,datn)
dat_transforms$RTtype <- factor(dat_transforms$RTtype, levels=c("observed","normal"))
# for safety
dat_transforms[dat_transforms$RTtype=="normal", c("RT_RecSqd", "RT_Rec", "RT_RecSqt", "RT_log", "RT_sqt", "RT_raw", "RT_sqd")] <- NA
# Create the normally distributed transformed variables, replacing the NAs declared above
for (k in levels(dat_transforms$dfSubset)){
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_RecSqd"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqd"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqd"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqd"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_Rec"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_Rec"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_Rec"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_Rec"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_RecSqt"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqt"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqt"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqt"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_log"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_log"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_log"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_log"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_sqt"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqt"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqt"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqt"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_raw"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_raw"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_raw"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_raw"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_sqd"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqd"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqd"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqd"]))
}
temp <- reshape2::melt(data=dat_transforms,
id.vars=c("dfSubset", "RTtype", "Item", "Vagueness", "Selection", "Quantity", "Order"),
measure.vars=c("RT_RecSqd", "RT_Rec", "RT_RecSqt", "RT_log", "RT_sqt", "RT_raw", "RT_sqd"))
ggplot(temp,
aes(value, colour=RTtype)) +
geom_freqpoly() +
facet_wrap(dfSubset~variable, scales="free", nrow=2) +
theme_bw() + theme(aspect.ratio=1) + scale_color_manual(values=c("blue","red"))
Show how transformations of RT affect distribution of times, and how they affect which times are outliers.
# boxplot subjects and items
load("data.Rda")
# allow reference to transformation
subdata = melt(dd,
measure.vars=c("RT_Rec", "RT_RecSqt", "RT_log", "RT_raw"))
# boxplots for subjects and items separately for each dependent variable
ggplot(subdata) +
facet_wrap(~ variable, scales="free", ncol=4) +
geom_boxplot(aes(y=value, x=Item, col="Items")) +
geom_boxplot(aes(y=value, x=Subject, col="Subjects")) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1), aspect.ratio=1, panel.grid=element_blank()) +
scale_colour_manual("",values=c("Items"="red","Subjects"="blue")) +
xlab("") + ylab("")
Show how mean times for individual subjects and items vary with respect to the grand mean RT.
# Show how mean times for individual subjects and items vary with respect to the grand mean RT.
subs=ddply(dd, .(level=Subject), summarise, effect="subject", emean=mean(dd$RT), lmean=mean(RT), ldiff=mean(RT)-mean(dd$RT), lsign=ifelse(mean(RT)-mean(dd$RT)>0,"slow","fast"))
itms=ddply(dd, .(level=Item), summarise, effect="item", emean=mean(dd$RT), lmean=mean(RT), ldiff=mean(RT)-mean(dd$RT), lsign=ifelse(mean(RT)-mean(dd$RT)>0,"slow","fast"))
si=rbind(subs,itms)
si$effect <- factor(si$effect, levels=c("subject", "item"))
si$lsign <- factor(si$lsign, levels=c("slow","fast"))
my1 <- xyplot(data=si, lmean~level | effect,
aspect=1,
layout=c(2,1),
ylim = extendrange(si$lmean, f = 0.10),
par.settings=list(trellis.par.set(superpose.symbol = list(col = c("blue","red"), pch=19), superpose.line=list(col = c("blue","red")))),
groups=si$lsign,
xlab=NULL,
ylab="RT (in ms)",
key=list(text=list(levels(si$lsign)), lines=list(lty=c(1,1), col=trellis.par.get('superpose.line')$col, pch=c(19,19), type='o'), divide=1),
mean=unique(si$emean),
scales=list(x=list(relation="free", at=list(15.5, 32.5), labels=list("subject number", "item identifier"), cex=1 ),
y=list(at=c(0, 2000, unique(si$emean), 4000, 6000, 8000, 10000), labels=c(0, 2000, "Mean", 4000, 6000, 8000, 10000), cex=.9) ),
panel=function(x,y,type,subscripts,groups=groups,...){
panel.xyplot(x,y,type='p',groups=groups, subscripts=subscripts,...)
panel.arrows(x0=x, y0=unique(si$emean), x1=x, y1=y, groups=groups, subscripts=subscripts, code=3, length=0, col=trellis.par.get('superpose.line')$col[groups][subscripts], ...)
panel.abline(h=unique(si$emean), col="lightgrey",lty=1)
panel.text(x, y=(ifelse(y<unique(si$emean),y-200,y+200)), as.numeric(si$level)[subscripts][panel.number()==1], cex=.75)
panel.text(x, y=(ifelse(y<unique(si$emean),y-200,y+200)), as.character(si$level)[subscripts][panel.number()==2], cex=.75)
})
my2 <- xyplot(data=si, ldiff~level | effect,
aspect=1, type='n',
ylab="Deviation from mean RT (in ms)",
ylim=extendrange(si$ldiff,f=0.10))
print(doubleYScale(my1, my2, add.axis=TRUE, add.ylab2=TRUE, use.style=FALSE))
Show how practice and fatigue effects vary over subjects
# Show how practice/fatigue effects vary over subjects
ggplot(dd, aes(y=RT_log, x=Trial)) + facet_wrap(~Subject,nrow=3) + theme(panel.grid=element_blank(), panel.margin = unit(0, "lines"), panel.background= element_rect(fill = 'white', colour='black' ), legend.position="top") + scale_x_continuous("Trial", breaks=NULL) + geom_point(pch=19, cex=.25, alpha=.5) + stat_smooth(fill='lightblue',col="red",lwd=.5, show.legend=TRUE) + theme(aspect.ratio = 1)
Show practice and fatigue effects vary over items.
# Show how practice/fatigue effects vary over items
ggplot(dd, aes(y=RT_log, x=Trial)) + facet_grid(~Item) + theme(panel.grid=element_blank(), panel.margin = unit(0, "lines"), panel.background= element_rect(fill = 'white', colour='black' ), legend.position="top") + scale_x_continuous("Trial") + geom_point(pch=19, cex=.25, alpha=.5) + stat_smooth(fill='lightblue',col="red",lwd=.5, show.legend=TRUE) + theme(aspect.ratio = 1)
Main effects
# prep main effects rts data frame
vrts <- cbind(Effect="Vagueness",x=factor(c(1,2)),summarySEwithin(dd, measurevar="RT_log", withinvars=c("Vagueness")));names(vrts)[3]<-"Levels"
nrts <- cbind(Effect="Selection",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("Selection")));names(nrts)[3]<-"Levels"
qrts <- cbind(Effect="Quantity",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("Quantity")));names(qrts)[3]<-"Levels"
orts <- cbind(Effect="Order",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("Order")));names(orts)[3]<-"Levels"
irts <- cbind(Effect="Item",x=factor(c(1,2,3,4)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("Item")));names(irts)[3]<-"Levels"
rts=rbind(vrts,nrts,qrts,orts)
rts$mins=rts$RT_log-rts$ci
rts$maxs=rts$RT_log+rts$ci
# plot main effects
ggplot(rts, aes(y=RT_log, x=Levels, group=1, ymin=mins, ymax=maxs)) +
geom_line() +
geom_errorbar(width=.1) +
geom_point(cex=3) +
facet_grid(~Effect, scale="free_x") +
theme_bw() +
theme(aspect.ratio=1)
Main effects over items
vrts <- cbind(Effect="Vagueness",x=factor(c(1,2)),summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio","Vagueness")));names(vrts)[4]<-"Levels"
nrts <- cbind(Effect="Selection",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio","Selection")));names(nrts)[4]<-"Levels"
qrts <- cbind(Effect="Quantity",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio","Quantity")));names(qrts)[4]<-"Levels"
orts <- cbind(Effect="Order",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio","Order")));names(orts)[4]<-"Levels"
rts=rbind(vrts,nrts,qrts,orts)
rts$mins=rts$RT_log-rts$ci
rts$maxs=rts$RT_log+rts$ci
rts$item_mean_ratio <- as.numeric(as.character(rts$item_mean_ratio))
ggplot(rts, aes(y=RT_log, x=item_mean_ratio, group=Levels, ymin=mins, ymax=maxs, shape=Levels, fill=Levels, col=Levels)) +
scale_shape_manual(values=c(21,21,22,22,23,23,24,24)) +
scale_fill_manual(values=c("black","gray","black","gray","black","gray","black","gray")) +
scale_color_manual(values=c("black","gray","black","gray","black","gray","black","gray")) +
geom_line() +
geom_errorbar(width=.01) +
geom_point(cex=3) +
facet_grid(~Effect) +
theme_bw() +
theme(aspect.ratio=1,
panel.grid=element_blank()) +
scale_x_continuous(breaks = round(sort(unique(rts$item_mean_ratio)),1))
2 way interactions ignoring item
# prep 2 ways rts data frame
rts1 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Vagueness","Selection")); names(rts1)[1] <- "F1"; names(rts1)[2] <- "F2"
rts1$Effect <- rep("Vagueness:Selection",4)
rts2 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Vagueness","Quantity")); names(rts2)[1] <- "F1"; names(rts2)[2] <- "F2"
rts2$Effect <- rep("Vagueness:Quantity",4)
rts3 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Vagueness","Order")); names(rts3)[1] <- "F1"; names(rts3)[2] <- "F2"
rts3$Effect <- rep("Vagueness:Order",4)
rts5 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Selection","Quantity")); names(rts5)[1] <- "F1"; names(rts5)[2] <- "F2"
rts5$Effect <- rep("Selection:Quantity",4)
rts6 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Selection","Order")); names(rts6)[1] <- "F1"; names(rts6)[2] <- "F2"
rts6$Effect <- rep("Selection:Order",4)
rts8 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("Quantity","Order")); names(rts8)[1] <- "F1"; names(rts8)[2] <- "F2"
rts8$Effect <- rep("Quantity:Order",4)
rts <- rbind(rts1,rts2,rts3,rts5,rts6,rts8)
rts <- subset(rts, select=c(Effect, F1, F2, RT_log, RT_log_norm, sd, se, ci))
rts$mins=rts$RT_log-rts$ci
rts$maxs=rts$RT_log+rts$ci
# plot 2 way interactions
ggplot(rts, aes(y=RT_log, x=F2, group=F1, ymin=mins, ymax=maxs, pch=F1)) +
geom_line() +
geom_errorbar(width=.1) +
geom_point(cex=3) +
facet_wrap(~Effect, scale="free_x", nrow=1) +
theme_bw() +
theme(aspect.ratio=1) +
theme(legend.title=element_blank()) +
xlab("Levels")
Plot 2 way interactions over items
# prep 2 ways over items rts data frame
rts1 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Vagueness","Selection")); names(rts1)[2] <- "F1"; names(rts1)[3] <- "F2"
rts1$E1 <- "Vagueness"
rts1$E2 <- "Selection"
rts2 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Vagueness","Quantity")); names(rts2)[2] <- "F1"; names(rts2)[3] <- "F2"
rts2$E1 <- "Vagueness"
rts2$E2 <- "Quantity"
rts3 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Vagueness","Order")); names(rts3)[2] <- "F1"; names(rts3)[3] <- "F2"
rts3$E1 <- "Vagueness"
rts3$E2 <- "Order"
rts4 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Quantity", "Selection")); names(rts4)[2] <- "F1"; names(rts4)[3] <- "F2"
rts4$E1 <- "Quantity"
rts4$E2 <- "Selection"
rts5 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Order","Selection")); names(rts5)[2] <- "F1"; names(rts5)[3] <- "F2"
rts5$E1 <- "Order"
rts5$E2 <- "Selection"
rts6 <- summarySEwithin(dd, measurevar="RT_log", withinvars=c("item_mean_ratio", "Quantity","Order")); names(rts6)[2] <- "F1"; names(rts6)[3] <- "F2"
rts6$E1 <- "Quantity"
rts6$E2 <- "Order"
rts <- rbind(rts1,rts2,rts3,rts4,rts5,rts6)
rts$mins=rts$RT_log-rts$ci
rts$maxs=rts$RT_log+rts$ci
rts$item_mean_ratio <- as.numeric(as.character(rts$item_mean_ratio))
rts$F3 <- factor(paste(rts$F1,rts$F2))
rts$E1 <- factor(rts$E1, levels=c("Selection","Order","Quantity","Vagueness"))
rts$E2 <- factor(rts$E2, levels=c("Selection","Order","Quantity","Vagueness"))
# The palette with black:
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
ggplot(rts, aes(y=RT_log, x=item_mean_ratio, group=F3, col=F2, pch=F1)) +
facet_grid(E2~E1, drop=F) +
geom_point(cex=3) + geom_line() + theme_bw() +
theme(aspect.ratio=1, panel.grid=element_blank()) +
scale_color_manual(name="colour",values=cbbPalette) +
scale_shape_discrete(name="shape") +
scale_x_continuous(breaks = round(sort(unique(rts$item_mean_ratio)),1))
# backing off A LOT from maximal model to avoid warning about maxfun
rtm <- lme4::lmer(
data=dd,
RT_log ~
c_Vag * Selection.sum + item_mean_ratio + c_Qty + c_Ord + s_Trl +
RTprev_log +
( c_Vag * Selection.sum | Subject)
)
rtm.summary <- summary(rtm)
rtm.confint <- confint(rtm, method='Wald')[13:20,]
Print a table of coefficients.
| Estimate | Std. Error | t value | |
|---|---|---|---|
| (Intercept) | 5.99 | 0.10 | 61.1 |
| c_Vag | -0.02 | 0.01 | -1.6 |
| Selection.sum | 0.19 | 0.02 | 10.7 |
| item_mean_ratio | 0.43 | 0.04 | 11.5 |
| c_Qty | -0.00 | 0.01 | -0.4 |
| c_Ord | 0.01 | 0.01 | 1.5 |
| s_Trl | -0.07 | 0.00 | -16.7 |
| RTprev_log | 0.13 | 0.01 | 12.4 |
| c_Vag:Selection.sum | 0.14 | 0.03 | 5.3 |
Plot coefficients
par(las=2, mar=c(14,6,1,1))
y=coef(rtm.summary)[-1,1] # omit intercept
n=length(y)
plot(y, xaxt="n", xlab="", ylab="RT_log\n", pch=19, ylim=extendrange(y, f=.20), main="Coefficients")
abline(h=0)
axis(1, labels=names(y) , at=1:n)
grid()
segments(x0=1:n, x1=1:n, y0=rtm.confint[,1], y1=rtm.confint[,2], lwd=2)
Plot partial effects
par(mfrow=c(2,4))
plotLMER.fnc(rtm)
## effect size (range) for c_Vag is 0.08472117
## effect size (range) for Selection.sum is 0.2597492
## effect size (range) for item_mean_ratio is 0.1308804
## effect size (range) for c_Qty is 0.003404283
## effect size (range) for c_Ord is 0.01320353
## effect size (range) for s_Trl is 0.2538381
## effect size (range) for RTprev_log is 0.4746599
suppressPackageStartupMessages(require(languageR))
cat("k is:",
collin.fnc(dd[,c("c_Trl", "Selection.sum", "c_Vag", "c_Qty", "c_Ord", "item_mean_ratio", "RTprev_log")])$cnumber)
## k is: 33.88255
suppressPackageStartupMessages(require(rms))
par(mar=c(1.1,3.1,1.1,1.1), pty='s')
plot(varclus(as.matrix(dd[,c("c_Trl", "Selection.sum", "c_Vag", "c_Qty", "c_Ord", "item_mean_ratio", "RTprev_log")])))
Model criticism baayen plots
# Baayen 4-plot model criticism
par(mfrow=c(1,4), pty='s')
# create scaled residuals
dd$rstand = as.vector(scale(resid(rtm)))
# plot scaled residuals density
plot(density(dd$rstand))
# plot sample quantiles versus theoretical quantiles
qqnorm(dd$rstand, cex=.5)
qqline(dd$rstand)
# plot standardised residuals versus fitted values
plot(dd$rstand ~ fitted(rtm), pch='.')
# absolute standardised residuals greater than 2.5 are candidates for being outliers, the abline identifies them on the plot
abline(h=c(-2.5,2.5))
Refit model with outliers removed (3.5 sd)
dd2 <- subset(dd, abs(rstand)<3.5 )
# backing off A LOT from maximal model to avoid warning about maxfun
rtm <- lmer(
data=dd2,
RT_log ~
c_Vag * Selection.sum + item_mean_ratio + c_Qty + c_Ord + s_Trl +
RTprev_log +
( c_Vag * Selection.sum | Subject)
)
rtm.summary <- summary(rtm)
rtm.confint <- confint(rtm, method='Wald')
Print a table of coefficients.
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 5.98 | 0.10 | 473.8 | 61.7 | 0.000 |
| c_Vag | -0.02 | 0.01 | 54.7 | -1.6 | 0.115 |
| Selection.sum | 0.19 | 0.02 | 39.0 | 10.7 | 0.000 |
| item_mean_ratio | 0.44 | 0.04 | 7531.2 | 11.8 | 0.000 |
| c_Qty | -0.01 | 0.01 | 7530.7 | -0.7 | 0.481 |
| c_Ord | 0.02 | 0.01 | 7530.6 | 1.9 | 0.062 |
| s_Trl | -0.07 | 0.00 | 7543.0 | -17.0 | 0.000 |
| RTprev_log | 0.13 | 0.01 | 7587.3 | 12.5 | 0.000 |
| c_Vag:Selection.sum | 0.14 | 0.03 | 40.1 | 5.3 | 0.000 |
Plot coefficients
par(las=2, mar=c(14,6,1,1))
y=coef(rtm.summary)[-1,1] # omit intercept
n=length(y)
plot(y, xaxt="n", xlab="", ylab="RT_log\n", pch=19, ylim=extendrange(y, f=.20), main="Coefficients")
abline(h=0)
axis(1, labels=names(y) , at=1:n)
grid()
segments(x0=1:n, x1=1:n, y0=rtm.confint[13:20,1], y1=rtm.confint[13:20,2], lwd=2)
Plot partial effects
par(mfrow=c(2,4))
plotLMER.fnc(rtm)
## effect size (range) for c_Vag is 0.08361262
## effect size (range) for Selection.sum is 0.2614438
## effect size (range) for item_mean_ratio is 0.1326846
## effect size (range) for c_Qty is 0.006014844
## effect size (range) for c_Ord is 0.01593027
## effect size (range) for s_Trl is 0.2560567
## effect size (range) for RTprev_log is 0.4740353
suppressPackageStartupMessages(require(languageR))
cat("k is:",
collin.fnc(dd2[,c("c_Trl", "Selection.sum", "c_Vag", "c_Qty", "c_Ord", "item_mean_ratio", "RTprev_log")])$cnumber
)
## k is: 33.87351
suppressPackageStartupMessages(require(rms))
par(mar=c(1.1,3.1,1.1,1.1), pty='s')
plot(varclus(as.matrix(dd2[,c("c_Trl", "Selection.sum", "c_Vag", "c_Qty", "c_Ord", "item_mean_ratio", "RTprev_log")])))
Model criticism baayen plots
# Baayen 4-plot model criticism
par(mfrow=c(1,4), pty='s')
# create scaled residuals
dd2$rstand = as.vector(scale(resid(rtm)))
# plot scaled residuals density
plot(density(dd2$rstand))
# plot sample quantiles versus theoretical quantiles
qqnorm(dd2$rstand, cex=.5)
qqline(dd2$rstand)
# plot standardised residuals versus fitted values
plot(dd2$rstand ~ fitted(rtm), pch='.')
# absolute standardised residuals greater than 2.5 are candidates for being outliers, the abline identifies them on the plot
abline(h=c(-2.5,2.5))
Separate analyses for vague vs crisp at each level of selection (matching, and comparison)
not done yet.
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 5.98 | 0.10 | 473.8 | 61.7 | 0.000 |
| c_Vag | -0.02 | 0.01 | 54.7 | -1.6 | 0.115 |
| Selection.sum | 0.19 | 0.02 | 39.0 | 10.7 | 0.000 |
| item_mean_ratio | 0.44 | 0.04 | 7531.2 | 11.8 | 0.000 |
| c_Qty | -0.01 | 0.01 | 7530.7 | -0.7 | 0.481 |
| c_Ord | 0.02 | 0.01 | 7530.6 | 1.9 | 0.062 |
| s_Trl | -0.07 | 0.00 | 7543.0 | -17.0 | 0.000 |
| RTprev_log | 0.13 | 0.01 | 7587.3 | 12.5 | 0.000 |
| c_Vag:Selection.sum | 0.14 | 0.03 | 40.1 | 5.3 | 0.000 |
not done yet.